setwd("~/Desktop/Research/NSF/WCT hybridization/WCT_RBT_Genomics/Long_results")
RBT_frequencies <- read.delim("~/Desktop/Research/NSF/WCT hybridization/WCT_RBT_Genomics/Long_results/ConGen_LongTest_data.txt")
data <- RBT_frequencies
L <- length(data$FinRBT) #Number of loci
M <- mean(data$FinRBT) #Average admixture across loci
###perform calculations
Vinv <- matrix(0,L,L)
diag(Vinv) <- 1/(M*(1 - M))
expvar <- mean(data$FinRBT*(1 - data$FinRBT))
MSE <- t(data$FinRBT - M)%*%Vinv%*%(data$FinRBT - M)/(L - 1)
MSEvec <- MSE + expvar*(1/(2*data$FinN) - 1/(2*mean(data$FinN)))
VMi <- data$FinRBT*(1 - data$FinRBT)*MSEvec
###obtain statistical results by locus
data <- transform(data, Fin_chi = (data$FinRBT - M)^2/VMi) #chi-square locus component
data <- transform(data, Fin_resid = (data$FinRBT - M)/sqrt(VMi)) #residual chi-square
data <- transform(data, Fin_effect = (data$Fin_resid)/sqrt(data$FinN)) #effect size
data <- transform(data, Fin_p = pchisq(data$Fin_chi,1,lower.tail=FALSE)) #p-value by locus
###write results to data table
write.table(format(data,scientific=FALSE),file="long_output.txt",quote=FALSE,row.names=FALSE,sep="\t")
L <- length(data$CycCRBT) #Number of loci
M <- mean(data$CycCRBT) #Average admixture across loci
###perform calculations
Vinv <- matrix(0,L,L)
diag(Vinv) <- 1/(M*(1 - M))
expvar <- mean(data$CycCRBT*(1 - data$CycCRBT))
MSE <- t(data$CycCRBT - M)%*%Vinv%*%(data$CycCRBT - M)/(L - 1)
MSEvec <- MSE + expvar*(1/(2*data$CycCN) - 1/(2*mean(data$CycCN)))
VMi <- data$CycCRBT*(1 - data$CycCRBT)*MSEvec
###obtain statistical results by locus
data <- transform(data, CycC_chi = (data$CycCRBT - M)^2/VMi) #chi-square locus component
data <- transform(data, CycC_resid = (data$CycCRBT - M)/sqrt(VMi)) #residual chi-square
data <- transform(data, CycC_effect = (data$CycC_resid)/sqrt(data$CycCN)) #effect size
data <- transform(data, CycC_p = pchisq(data$CycC_chi,1,lower.tail=FALSE)) #p-value by locus
###write results to data table
write.table(format(data,scientific=FALSE),file="long_output.txt",quote=FALSE,row.names=FALSE,sep="\t")
L <- length(data$HayRBT) #Number of loci
M <- mean(data$HayRBT) #Average admixture across loci
###perform calculations
Vinv <- matrix(0,L,L)
diag(Vinv) <- 1/(M*(1 - M))
expvar <- mean(data$HayRBT*(1 - data$HayRBT))
MSE <- t(data$HayRBT - M)%*%Vinv%*%(data$HayRBT - M)/(L - 1)
MSEvec <- MSE + expvar*(1/(2*data$HayN) - 1/(2*mean(data$HayN)))
VMi <- data$HayRBT*(1 - data$HayRBT)*MSEvec
###obtain statistical results by locus
data <- transform(data, Hay_chi = (data$HayRBT - M)^2/VMi) #chi-square locus component
data <- transform(data, Hay_resid = (data$HayRBT - M)/sqrt(VMi)) #residual chi-square
data <- transform(data, Hay_effect = (data$Hay_resid)/sqrt(data$HayN)) #effect size
data <- transform(data, Hay_p = pchisq(data$Hay_chi,1,lower.tail=FALSE)) #p-value by locus
###write results to data table
write.table(format(data,scientific=FALSE),file="long_output.txt",quote=FALSE,row.names=FALSE,sep="\t")
##Calculate combined P value using Fisher's combined probability test
Long_results <- read.delim("~/Desktop/Research/NSF/WCT hybridization/WCT_RBT_Genomics/Long_results/long_output.txt")
data <- Long_results
n <- 3 ##number of P values being combined
df <- 2*n
data <- transform(data, Combined_P = pchisq( -2*(log(data$Fin_p)+log(data$CycC_p)+log(data$Hay_p)), df, lower.tail=FALSE))
write.table(format(data,scientific=FALSE),file="long_output.txt",quote=FALSE,row.names=FALSE,sep="\t")
###Correction for multiple tests
Combined_P <- pchisq( -2*(log(data$Fin_p)+log(data$CycC_p)+log(data$Hay_p)
fdr <- p.adjust(Combined_P, "BH")
sum(fdr < 0.05)
sum(Combined_P < 0.05)
sum(Combined_P < 0.05/9380)